{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第三题：支持向量机的分类任务\n",
    "\n",
    "实验内容：\n",
    "1. 使用支持向量机完成spambase垃圾邮件分类任务\n",
    "2. 使用训练集训练模型，计算测试集的精度，查准率，查全率，F1值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 请你使用SVC，完成spambase分类任务\n",
    "\n",
    "要求：使用全部特征，完成以下内容的填写\n",
    "\n",
    "###### 双击此处填写\n",
    "\n",
    "核函数 | C | 精度 | 查准率 | 查全率 | F1\n",
    "- | - | - | - | - | -\n",
    "rbf | 0.1 | 0.0 | 0.0 | 0.0 | 0.0\n",
    "rbf | 1 | 0.0 | 0.0 | 0.0 | 0.0\n",
    "linear | 0.1 | 0.0 | 0.0 | 0.0 | 0.0\n",
    "linear | 1 | 0.0 | 0.0 | 0.0 | 0.0\n",
    "sigmoid | 0.1 | 0.0 | 0.0 | 0.0 | 0.0\n",
    "sigmoid | 1 | 0.0 | 0.0 | 0.0 | 0.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "OSError",
     "evalue": "data/spambase/spambase.data not found.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mOSError\u001b[0m                                   Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-1-94c8cc6526df>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# 导入数据\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloadtxt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'data/spambase/spambase.data'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdelimiter\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\",\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[0mspamx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;36m57\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[0mspamy\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m57\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\anaconda3\\lib\\site-packages\\numpy\\lib\\npyio.py\u001b[0m in \u001b[0;36mloadtxt\u001b[1;34m(fname, dtype, comments, delimiter, converters, skiprows, usecols, unpack, ndmin, encoding, max_rows)\u001b[0m\n\u001b[0;32m    979\u001b[0m             \u001b[0mfname\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mos_fspath\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    980\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0m_is_string_like\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 981\u001b[1;33m             \u001b[0mfh\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_datasource\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'rt'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mencoding\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    982\u001b[0m             \u001b[0mfencoding\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfh\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'encoding'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'latin1'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    983\u001b[0m             \u001b[0mfh\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0miter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfh\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\anaconda3\\lib\\site-packages\\numpy\\lib\\_datasource.py\u001b[0m in \u001b[0;36mopen\u001b[1;34m(path, mode, destpath, encoding, newline)\u001b[0m\n\u001b[0;32m    267\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    268\u001b[0m     \u001b[0mds\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mDataSource\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdestpath\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 269\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mds\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mencoding\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnewline\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnewline\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    270\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    271\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\anaconda3\\lib\\site-packages\\numpy\\lib\\_datasource.py\u001b[0m in \u001b[0;36mopen\u001b[1;34m(self, path, mode, encoding, newline)\u001b[0m\n\u001b[0;32m    621\u001b[0m                                       encoding=encoding, newline=newline)\n\u001b[0;32m    622\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 623\u001b[1;33m             \u001b[1;32mraise\u001b[0m \u001b[0mIOError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"%s not found.\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mpath\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    624\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    625\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mOSError\u001b[0m: data/spambase/spambase.data not found."
     ]
    }
   ],
   "source": [
    "# 导入数据\n",
    "import numpy as np\n",
    "data = np.loadtxt('data/spambase/spambase.data', delimiter = \",\")\n",
    "spamx = data[:, :57]\n",
    "spamy = data[:, 57]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 数据集分割\n",
    "from sklearn.model_selection import train_test_split\n",
    "trainX, testX, trainY, testY = train_test_split(spamx, spamy, test_size = 0.3, random_state = 32)\n",
    "trainX.shape, trainY.shape, testX.shape, testY.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**注意：计算线性核的时候，要使用 LinearSVC 这个类，不要使用SVC(kernel = 'linear')。LinearSVC不需要设置kernel参数！**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 引入模型\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.svm import LinearSVC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# YOUR CODE HERE\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 选做：比较LinearSVC和SVC(kernel = 'linear')的运行时间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# YOUR CODE HERE\n",
    "\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
